import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.offline as pyo
from plotly.subplots import make_subplots
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams["figure.figsize"] = (12,6)
data = pd.read_csv("hotel_bookings.csv")
data.head(5)
data.shape
data.isna().sum()
# Getting info about the columns and their data type
data.info()
data["children"].fillna(0, inplace = True)
data["children"] = data["children"].astype(int)
filter = (data["adults"] == 0) & (data["children"] == 0) & (data["babies"] == 0)
data = data[~filter]
data.shape
df = data.copy()
df['datetime'] = (pd.to_datetime(df['arrival_date_year'].astype(str) + '-' +
df['arrival_date_month'].astype(str) + '-' +
df['arrival_date_day_of_month'].astype(str)))
df['month-year'] = df['datetime'].dt.strftime('%Y-%m')
df = df.sort_values('datetime')
## Replacing Undefined meal type with SC
df["meal"].replace("Undefined", "SC", inplace = True)
df["Total Guests"] = df["adults"] + df["children"] + df["babies"]
df["Total Stay"] = df["stays_in_weekend_nights"] + df["stays_in_week_nights"]
## Splitting into resort and city hotels
ch = df[df["hotel"] == "City Hotel"]
rh = df[df["hotel"] == "Resort Hotel"]
rh_status = rh["reservation_status"].value_counts().plot(kind = "bar")
rh["is_canceled"].value_counts()
rh["reservation_status"].value_counts()
rh_checked = rh[rh["is_canceled"] == 0]
rh_checked["is_canceled"].value_counts()
rh_time = pd.DataFrame(rh_checked.groupby("month-year").size(), columns = ["count"])
# rh_time
fig = px.line(rh_time, x=rh_time.index, y="count", template = "plotly_dark", title = "Time Wise Count of guests checking in at Resort Hotel")
fig.show(template = "plotly_dark")
ch_checked = ch[ch["is_canceled"] == 0]
ch_time = pd.DataFrame(ch_checked.groupby("month-year").size(), columns = ["count"])
fig = px.line(ch_time, x=ch_time.index, y="count", template = "plotly_dark", title = "Time wise count of guests that checked in at City hotel")
fig.show(template = "plotly_dark")
rh_country = rh_checked["country"].value_counts().to_frame()
total_rh = len(rh_checked)
rh_country["Percentage of count"] = round((rh_country["country"]/total_rh)*100,2)
rh_country.head(5)
fig_map = px.choropleth(rh_country, locations = rh_country.index, color = "Percentage of count", hover_data = [rh_country.index, rh_country.country], color_continuous_scale=px.colors.sequential.Plasma,
title="Density of origin countries of guests - Resort Hotel", template = "plotly_dark")
fig_map.show()
ch_country = ch_checked["country"].value_counts().to_frame()
total_ch = len(ch_checked)
ch_country["Percentage count"] = round((ch_country["country"]/total_ch)*100, 2)
fig_map = px.choropleth(ch_country, locations = ch_country.index, color = "Percentage count", hover_data = [ch_country.index, ch_country.country], color_continuous_scale=px.colors.sequential.Plasma,
title="Density of origin countries of guests - City Hotel", template = "plotly_dark")
fig_map.show()
## Determining which countries have the most cancellations
rh_canceled = rh[rh["is_canceled"]==1]
ch_canceled = ch[ch["is_canceled"]==1]
rh_canceled_country = rh_canceled["country"].value_counts().to_frame()[:10]
rh_canceled_country["Percentage"] = round((rh_canceled_country["country"]/len(rh_canceled))*100,2)
ch_canceled_country = ch_canceled["country"].value_counts().to_frame()[:10]
ch_canceled_country["Percentage"] = round((ch_canceled_country["country"]/len(ch_canceled))*100, 2)
fig = px.bar(rh_canceled_country, x=rh_canceled_country.index, y='Percentage',hover_data = [rh_canceled_country.country], title = "Top 10 countries with most cancellations - Resort Hotel", template = "plotly_dark")
fig.show()
fig = px.bar(ch_canceled_country, x=ch_canceled_country.index, y='Percentage',hover_data = [ch_canceled_country.country], title = "Top 10 countries with most cancellations - City Hotel", template = "plotly_dark")
fig.show()
df_checked = df[df["is_canceled"] == 0]
fig = px.box(df_checked, x="reserved_room_type", y="adr", color="hotel", template = "plotly_dark")
fig.show()
## Trend of average daily rate over time
df_adr_time = df_checked.groupby(["month-year", "hotel"])["adr"].mean().to_frame().reset_index()
df_adr_time.head(5)
fig = px.line(df_adr_time, x = "month-year", y = "adr", color = "hotel", title = "Average daily rate over time", template = "plotly_dark")
fig.show()
df_time = df_checked.groupby(["month-year", "hotel"]).size().to_frame().reset_index()
df_time = df_time.rename(columns = {0 : "count"})
fig = px.line(df_time, x = "month-year", y = "count", color = "hotel", title = "Average daily rate over time", template = "plotly_dark")
fig.show()
## Comparing price with check-ins for each hotel
df_count_price = pd.merge(df_adr_time, df_time, on = ["month-year", "hotel"])
rh_count_price = df_count_price[df_count_price["hotel"] == "Resort Hotel"]
ch_count_price = df_count_price[df_count_price["hotel"] == "City Hotel"]
ch_count_price.corr()
rh_count_price.corr()
rh_checked["market_segment"].value_counts().plot(kind = "pie", legend = True)
mkt_sgmt_cnt_rh = rh["market_segment"].value_counts().to_frame()
mkt_sgmt_cnt_ch = ch["market_segment"].value_counts().to_frame()
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=mkt_sgmt_cnt_ch.index.tolist(), values=mkt_sgmt_cnt_ch.market_segment.tolist(), name="City Hotel"),
1, 1)
fig.add_trace(go.Pie(labels=mkt_sgmt_cnt_rh.index.tolist(), values=mkt_sgmt_cnt_rh.market_segment.tolist(), name="Resort Hotel"),
1, 2)
# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")
fig.update_layout(
title_text="Market Segment Division",
# Add annotations in the center of the donut pies.
annotations=[dict(text='City Hotel', x=0.18, y=0.5, font_size=20, showarrow=False),
dict(text='Resort Hotel', x=0.82, y=0.5, font_size=20, showarrow=False)])
fig.show()
df_repeated = df[df["is_repeated_guest"] == 1]
repeated_hotel_wise = df.groupby(["hotel","is_repeated_guest"]).size().to_frame().reset_index().rename(columns = {0 : "count"})
fig = px.bar(repeated_hotel_wise, x = "hotel", y = "count", color = "is_repeated_guest", barmode = 'group', title = "Repeated Guests Hotel wise", template = "plotly_dark")
fig.show()
df_repeated.groupby(["hotel", "is_canceled"]).size()
date_df = df.set_index('datetime')
customer_transient = date_df[date_df["customer_type"] == "Transient"]
customer_transient_party = date_df[date_df["customer_type"] == "Transient-Party"]
customer_contract = date_df[date_df["customer_type"] == "Contract"]
customer_group = date_df[date_df["customer_type"] == "Group"]
customer_transient_m = customer_transient.resample("m").sum()
customer_transient_party_m = customer_transient_party.resample("m").sum()
customer_contract_m = customer_contract.resample("m").sum()
customer_group_m = customer_group.resample("m").sum()
fig = go.Figure()
fig.add_trace(go.Scatter(x = customer_transient_m.index, y = customer_transient_m["Total Guests"], name = "Transient Guests"))
fig.add_trace(go.Scatter(x = customer_transient_party_m.index, y = customer_transient_party_m["Total Guests"], name = "Transient Party Guests"))
fig.add_trace(go.Scatter(x = customer_contract_m.index, y = customer_contract_m["Total Guests"], name = "Contract Guests"))
fig.add_trace(go.Scatter(x = customer_group_m.index, y = customer_group_m["Total Guests"], name = "Group Guests"))
fig.update_layout(title = "Number of Guests by Customer Type", xaxis_title = "Arrival Date", yaxis_title = "Number of Guests", width = 1050)
fig.show()
hist_data = [rh['Total Stay'], ch["Total Stay"]]
group_labels = ["Resort Hotel", "City Hotel"]
fig = ff.create_distplot(hist_data, group_labels, show_rug = False)
fig.show()
got_reserved_rh = np.where(rh["reserved_room_type"] == rh["assigned_room_type"], "Yes", "No")
got_reserved_ch = np.where(ch["reserved_room_type"] == ch["assigned_room_type"], "Yes", "No")
got_reserved_rh_count = pd.Series(got_reserved_rh).value_counts().to_frame()
got_reserved_ch_count = pd.Series(got_reserved_ch).value_counts().to_frame()
got_reserved_rh_count[0]
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=got_reserved_ch_count.index.to_list(), values=got_reserved_ch_count[0].tolist(), name="City Hotel"),
1, 1)
fig.add_trace(go.Pie(labels=got_reserved_rh_count.index.tolist(), values=got_reserved_rh_count[0].tolist(), name="Resort Hotel"),
1, 2)
# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent+name")
fig.update_layout(
title_text="If the assigned room is the same as reserved room",
# Add annotations in the center of the donut pies.
annotations=[dict(text='City Hotel', x=0.18, y=0.5, font_size=20, showarrow=False),
dict(text='Resort Hotel', x=0.82, y=0.5, font_size=20, showarrow=False)])
fig.show()
df_family = df_checked[(df_checked["children"]>0) | (df_checked["babies"]>0)]
df_family["adults"].value_counts()
df_family[df_family["adults"] == 0].babies.value_counts()
sns.heatmap(data.corr(), annot = True)